---
title: "Anthromes 12K Discrete Global Grid System -- Anthromes Classification"
author: "Nicolas Gauthier"
date: "Last knit on: `r Sys.Date()`"
output:
  pdf_document: 
    toc: yes
    latex_engine: xelatex
    highlight: pygments
  html_document: default
---

# Setup

Load **tidyverse** for data cleaning and this package, **anthromes**, for additional analysis and plotting functions.
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
devtools::load_all()
```

# Anthromes Classification

```{r}
fixed_inputs <- read_csv('data/derived_data/fixed_inputs.csv', col_types = 'iidiii')
```

```{r}
hyde <- readRDS('tmp/hyde_dgg') %>%
  rename_hyde()

 mutate(id = dgg_ids) %>%
  split(1:200) %>%
  map_dfr(~prep_hyde() %>% anthromes_classify())
 
prep_hyde <- function(hyde, fixed) {
  hyde %>%
  time_steps
  gather(., key = 'name', value = 'value', -id) %>% # which ID -
  separate(name, into = c('var', 'time_step'), sep = '[.]') %>%
  spread(var, value) %>%
  left_join(fixed, by = 'id') %>% # this fixed input will be different
  mutate(trees = pot_veg <= 8,
         crops = cropland / area,
            grazing = grazing / area,
            rice = ir_rice / area,
            pop = popc / area,
            irrigation = tot_irri / area,
            urban = uopp / area,
            used = urban + crops + grazing,
            .keep = 'unused') 
  
hyde %>%
    .[1:100000,] %>%
  pivot_longer(everything()) %>%
  separate(name, into = c('var', 'time_step'), sep = '[.]') %>%
  pivot_wider(var, value) %>%
  left_join(fixed_inputs, by = 'id') %>% # this fixed input will be different
  mutate(trees = pot_veg <= 8,
         crops = cropland / area,
            grazing = grazing / area,
            rice = ir_rice / area,
            pop = popc / area,
            irrigation = tot_irri / area,
            urban = uopp / area,
            used = urban + crops + grazing,
            .keep = 'unused')
}
```

```{r}
hyde_dgg <- readRDS('hyde_dgg')

anthromes_dgg <- hyde_dgg %>%
    setNames(new_names) %>%
  mutate(id = dgg_ids) %>%
  split(1:200) %>%
  map_dfr(~gather(., key = 'name', value = 'value', -id) %>% # which ID -
  separate(name, into = c('var', 'time_step'), sep = '[.]') %>%
  spread(var, value) %>%
  left_join(fixed_inputs, by = 'id') %>% # this fixed input will be different
  mutate(trees = pot_veg <= 8,
         crops = cropland / area,
            grazing = grazing / area,
            rice = ir_rice / area,
            pop = popc / area,
            irrigation = tot_irri / area,
            urban = uopp / area,
            used = urban + crops + grazing,
            .keep = 'unused') %>%
  anthromes_classify()) #%>%
## necessary to save ram, but not if run on cluster
write_csv(anthromes_dgg, 'anthromes_dgg2.csv')
gc()
read_csv('anthromes_dgg2.csv') %>%
  mutate(time_step = factor(time_step, levels = time_key$time_step)) %>%
  arrange(id, time_step) %>%
  spread(time_step, anthrome) %>%
  write_csv('anthromes_dgg.csv')
```

## Uncertainties
```{r}
hyde_dgg_upper <- readRDS('hyde_dgg_upper')

var_names <- names(hyde_dgg_upper) %>%
  str_extract(hyde_names_grepl)

time_steps <- names(hyde_dgg_upper) %>%
  str_sub(start = str_length(var_names) + 5) %>%
  str_remove('_')

new_names <- paste(var_names, time_steps, sep = '.')

anthromes_dgg_upper <- hyde_dgg_upper %>%
  setNames(new_names) %>%
  mutate(id = dgg_ids) %>%
  # split into shards to save memory
  split(1:200) %>%
  map_dfr(~gather(., key = 'name', value = 'value', -id) %>%
  separate(name, into = c('var', 'time_step'), sep = '[.]') %>%
  spread(var, value) %>%
  left_join(fixed_inputs, by = 'id') %>%
  mutate(crops = cropland / area,
            grazing = grazing / area,
            rice = ir_rice / area,
            pop = popc / area,
            irrigation = tot_irri / area,
            urban = uopp / area,
            used = urban + crops + grazing,
            .keep = 'unused') %>%
    anthromes_classify()) #%>%
## necessary to save ram, but not if run on cluster
write_csv(anthromes_dgg_upper, 'anthromes_dgg_upper2.csv')
gc()
read_csv('anthromes_dgg_upper2.csv') %>%
  mutate(time_step = factor(time_step, levels = time_key$time_step)) %>%
  arrange(id, time_step) %>%
  spread(time_step, anthrome) %>%
  write_csv('anthromes_dgg_upper.csv')
```



```{r}
hyde_dgg_lower <- readRDS('hyde_dgg_lower')

var_names <- names(hyde_dgg_lower) %>%
  str_extract(hyde_names_grepl)

time_steps <- names(hyde_dgg_lower) %>%
  str_sub(start = str_length(var_names) + 5) %>%
  str_remove('_')

new_names <- paste(var_names, time_steps, sep = '.')

anthromes_dgg_lower <- hyde_dgg_lower %>%
    setNames(new_names) %>%
  mutate(id = dgg_ids) %>%
  split(1:200) %>%
  map_dfr(~gather(., key = 'name', value = 'value', -id) %>%
  separate(name, into = c('var', 'time_step'), sep = '[.]') %>%
  spread(var, value) %>%
  left_join(fixed_inputs, by = 'id') %>%
  mutate(crops = cropland / area,
            grazing = grazing / area,
            rice = ir_rice / area,
            pop = popc / area,
            irrigation = tot_irri / area,
            urban = uopp / area,
            used = urban + crops + grazing,
            .keep = 'unused') %>%
  anthromes_classify()) #%>%
## necessary to save ram, but not if run on cluster
write_csv(anthromes_dgg_lower, 'anthromes_dgg_lower2.csv')
gc()
read_csv('anthromes_dgg_lower2.csv') %>%
  mutate(time_step = factor(time_step, levels = time_key$time_step)) %>%
  arrange(id, time_step) %>%
  spread(time_step, anthrome) %>%
  write_csv('anthromes_dgg_lower.csv')
```

```{r}
write_csv(anthromes_dgg, 'anthromes_dgg.csv')
```

```{r}
anthromes_dgg <- read_csv('anthromes_dgg.csv', col_types = cols(.default = 'i'))
```

```{r}
anthromes_dgg_shp <- anthromes_dgg %>%
  mutate(geometry = dgg_land$geometry) %>%
  st_as_sf() %>%
  st_set_crs(4326)
  
write_sf(anthromes_dgg_shp, 'anthromes_dgg.shp')
```
